Predictor Data Preparation and Consolidation

Skyler Lewis 2025-01-02

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(habistat)
library(patchwork)
theme_set(theme_minimal())

Basic Flow Crosswalk via Drainage Area and Precipitation

See watershed_delineation.R for dependencies

make_flow_xw <- function(group_var) {
  habistat::flowline_attr |>
  select({{group_var}}, 
         comid = comid,
#         da_reach = da_area_sq_km_tot,
# 20241223: Switched to divergence-routed due to unexpected large vlaues in da_area_sq_km_tot, e.g. see comid 1889628
         da_reach = da_area_sq_km_div,
         pc_reach = da_ppt_mean_mm) |>
  drop_na({{group_var}}) |>
  group_by({{group_var}}) |>
  mutate(outlet_comid = comid[which.max(da_reach)]) |>
  mutate(da_outlet = da_reach[which(comid == outlet_comid)],
         pc_outlet = pc_reach[which(comid == outlet_comid)]) |>
  mutate(multiplier = (da_reach / da_outlet) * (pc_reach / pc_outlet)) |>
  arrange({{group_var}}, -multiplier)
}

cv_watersheds_flow_xw <- make_flow_xw(watershed_level_3)
cv_watersheds_flow_xw |> usethis::use_data(overwrite=T)
## ✔ Setting active project to
##   "C:/Users/skylerlewis/Github/swc-habitat-suitability-parallel".

## ✔ Saving "cv_watersheds_flow_xw" to "data/cv_watersheds_flow_xw.rda".

## ☐ Document your data (see <https://r-pkgs.org/data.html>).
cv_mainstems_flow_xw <- make_flow_xw(river_cvpia)
cv_mainstems_flow_xw |> usethis::use_data(overwrite=T)
## ✔ Saving "cv_mainstems_flow_xw" to "data/cv_mainstems_flow_xw.rda".
## ☐ Document your data (see <https://r-pkgs.org/data.html>).
cv_mainstem_groups_flow_xw <- make_flow_xw(river_group)
cv_mainstem_groups_flow_xw |> usethis::use_data(overwrite=T)
## ✔ Saving "cv_mainstem_groups_flow_xw" to "data/cv_mainstem_groups_flow_xw.rda".
## ☐ Document your data (see <https://r-pkgs.org/data.html>).
habistat::flowline_geom_proj |>
  inner_join(cv_watersheds_flow_xw, by=join_by(comid)) |>
  filter(comid %in% habistat::wua_predicted$comid) |>
  ggplot() + 
  geom_sf(aes(color = multiplier)) + 
  geom_sf(data = habistat::flowline_geom_proj |> 
            filter(comid %in% cv_watersheds_flow_xw$outlet_comid) |> 
            st_line_sample(sample=1), 
          aes(color = 1)) + 
  scale_color_viridis_c(direction = -1) +
  ggtitle("Flow Multipliers by Watershed")

habistat::flowline_geom_proj |>
  inner_join(cv_mainstems_flow_xw, by=join_by(comid)) |>
  filter(comid %in% habistat::wua_predicted$comid) |>
  ggplot() + 
  geom_sf(aes(color = multiplier)) + 
  geom_sf(data = habistat::flowline_geom_proj |> 
            filter(comid %in% cv_mainstems_flow_xw$outlet_comid) |> 
            st_line_sample(sample=1), 
          aes(color = 1)) + 
  scale_color_viridis_c(direction = -1) + 
  ggtitle("Flow Multipliers by Mainstem")

habistat::flowline_geom_proj |>
  inner_join(cv_mainstem_groups_flow_xw, by=join_by(comid)) |>
  filter(comid %in% habistat::wua_predicted$comid) |>
  ggplot() + 
  geom_sf(aes(color = multiplier)) + 
  geom_sf(data = habistat::flowline_geom_proj |> 
            filter(comid %in% cv_mainstem_groups_flow_xw$outlet_comid) |> 
            st_line_sample(sample=1), 
          aes(color = 1)) + 
  scale_color_viridis_c(direction = -1) + 
  ggtitle("Flow Multipliers by Mainstem Group")

Applying the crosswalk to aggregate model results

# habistat::wua_predicted <-
#     readRDS(here::here("data-raw", "results", "habistat::wua_predicted.Rds"))
# for backwards compatibility because habistat::wua_predicted is now pivoted

# load(here::here("data", "wua_predicted.Rda")) # this contains the ensemble predictions

wua_predicted_cv_watersheds <- 
  habistat::wua_predicted |>
  inner_join(cv_watersheds_flow_xw, 
             by=join_by(watershed_level_3, comid)) |>
  expand_grid(scaled = c(FALSE, TRUE)) |>
  mutate(flow_cfs = if_else(scaled, flow_cfs * multiplier, flow_cfs)) |>
  arrange(habitat, comid, flow_cfs) |> # , model_name, 
  group_by(habitat, comid) |> # , model_name, 
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) if_else(scaled, NA, v))) |>
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) zoo::na.approx(v, x = flow_cfs, na.rm=F, rule=2))) |>
  filter(scaled) |>
  select(-scaled) |>
  ungroup() |>
  group_by(habitat, watershed_level_3, flow_idx) |> # , model_name, 
  summarize(wua_per_lf_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_acres_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / 43560, 
            .groups="drop") |>
  inner_join(habistat::wua_predicted |> 
               group_by(flow_idx) |> 
               summarize(flow_cfs = first(flow_cfs), .groups="drop"),
             by = join_by(flow_idx))

# wua_predicted_cv_watersheds <- wua_predicted_cv_watersheds |>
#   pivot_wider(names_from = model_name, values_from = c(wua_per_lf_pred, wua_acres_pred)) |>
#   mutate(wua_per_lf_pred = (wua_per_lf_pred_SD + wua_per_lf_pred_SN) / 2,
#          wua_acres_pred = (wua_acres_pred_SD + wua_acres_pred_SN) / 2)

wua_predicted_cv_watersheds |> usethis::use_data(overwrite=T)
## ✔ Saving "wua_predicted_cv_watersheds" to
##   "data/wua_predicted_cv_watersheds.rda".

## ☐ Document your data (see <https://r-pkgs.org/data.html>).
wua_predicted_cv_mainstems <- 
  habistat::wua_predicted |> 
  inner_join(cv_mainstems_flow_xw, 
             by=join_by(river_cvpia, comid)) |>
  expand_grid(scaled = c(FALSE, TRUE)) |>
  mutate(flow_cfs = if_else(scaled, flow_cfs * multiplier, flow_cfs)) |>
  arrange(habitat, comid, flow_cfs) |> # , model_name, 
  group_by(habitat, comid) |>  # , model_name, 
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) if_else(scaled, NA, v))) |>
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) zoo::na.approx(v, x = flow_cfs, na.rm=F, rule=2))) |>
  filter(scaled) |>
  select(-scaled) |>
  ungroup() |>
  group_by(habitat, river_cvpia, flow_idx) |> # , model_name, 
  summarize(wua_per_lf_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_acres_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / 43560, 
            .groups="drop") |>
  inner_join(habistat::wua_predicted |> 
               group_by(flow_idx) |> 
               summarize(flow_cfs = first(flow_cfs), .groups="drop"),
             by = join_by(flow_idx)) 

# wua_predicted_cv_mainstems <- wua_predicted_cv_mainstems |>
#   pivot_wider(names_from = model_name, values_from = c(wua_per_lf_pred, wua_acres_pred)) |>
#   mutate(wua_per_lf_pred = (wua_per_lf_pred_SD + wua_per_lf_pred_SN) / 2,
#          wua_acres_pred = (wua_acres_pred_SD + wua_acres_pred_SN) / 2)

wua_predicted_cv_mainstems |> usethis::use_data(overwrite=T)
## ✔ Saving "wua_predicted_cv_mainstems" to "data/wua_predicted_cv_mainstems.rda".
## ☐ Document your data (see <https://r-pkgs.org/data.html>).
# these are the mainstems grouped for CVPIA model comparison
wua_predicted_cv_mainstems_grouped <- 
  habistat::wua_predicted |> 
  inner_join(cv_mainstem_groups_flow_xw, 
             by=join_by(river_group, comid)) |>
  expand_grid(scaled = c(FALSE, TRUE)) |>
  mutate(flow_cfs = if_else(scaled, flow_cfs * multiplier, flow_cfs)) |>
  arrange(habitat, comid, flow_cfs) |> # , model_name, 
  group_by(habitat, comid) |> # , model_name, 
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) if_else(scaled, NA, v))) |>
  mutate(across(c(wua_per_lf_pred, wua_per_lf_pred_SD, wua_per_lf_pred_SN),
                \(v) zoo::na.approx(v, x = flow_cfs, na.rm=F, rule=2))) |>
  filter(scaled) |>
  select(-scaled) |>
  ungroup() |>
  group_by(habitat, river_group, flow_idx) |>  # , model_name, 
  summarize(wua_per_lf_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_per_lf_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_acres_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SD = sum(wua_per_lf_pred_SD * reach_length_ft, na.rm=T) / 43560, 
            wua_acres_pred_SN = sum(wua_per_lf_pred_SN * reach_length_ft, na.rm=T) / 43560, 
            .groups="drop") |>
  inner_join(habistat::wua_predicted |> 
               group_by(flow_idx) |> 
               summarize(flow_cfs = first(flow_cfs), .groups="drop"),
             by = join_by(flow_idx))

# wua_predicted_cv_mainstems_grouped <- wua_predicted_cv_mainstems_grouped |>
#   pivot_wider(names_from = model_name, values_from = c(wua_per_lf_pred, wua_acres_pred)) |>
#   mutate(wua_per_lf_pred = (wua_per_lf_pred_SD + wua_per_lf_pred_SN) / 2,
#          wua_acres_pred = (wua_acres_pred_SD + wua_acres_pred_SN) / 2)

wua_predicted_cv_mainstems_grouped |> usethis::use_data(overwrite=T)
## ✔ Saving "wua_predicted_cv_mainstems_grouped" to
##   "data/wua_predicted_cv_mainstems_grouped.rda".
## ☐ Document your data (see <https://r-pkgs.org/data.html>).

Plotting the output

drainage_areas <-
  habistat::cv_watersheds |>
  group_by(watershed_level_3) |>
  summarize() |>
  transmute(river_group = watershed_level_3,
            area_ac = st_area(geometry) |>
                        units::set_units("acres") |>
                        units::drop_units()) |>
  st_drop_geometry() 

watershed_labels <-
  drainage_areas |>
  transmute(river_group,
            label = str_glue("{river_group}\n{format(round(area_ac, -3), big.mark=',')} ac")) |>
  deframe()
wua_predicted_cv_watersheds |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_level_3, scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, fill = habitat), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, color = habitat)) +
  scale_x_log10() +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(palette = "Dark2", aesthetics = c("color", "fill")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
  ggtitle("Watersheds (incl. tributaries)")

wua_predicted_cv_mainstems |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~river_cvpia, scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, fill = habitat), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, color = habitat)) +
  scale_x_log10() +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(palette = "Dark2", aesthetics = c("color", "fill")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
  ggtitle("Mainstems")

wua_predicted_cv_mainstems_grouped |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, fill = habitat), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, color = habitat)) +
  scale_x_log10() +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(palette = "Dark2", aesthetics = c("color", "fill")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
  ggtitle("Mainstems (grouped by watershed)")

Versus the “naive” method

wua_predicted |>
  filter(!is.na(river_group)) |>
  group_by(habitat, river_group, flow_idx, flow_cfs) |>
  summarize(wua_per_lf_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / sum(reach_length_ft, na.rm=T),
            wua_acres_pred = sum(wua_per_lf_pred * reach_length_ft, na.rm=T) / 43560, .groups="drop") |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_line(aes(y = wua_acres_pred, color = habitat)) +
  scale_x_log10() +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(palette = "Dark2", aesthetics = c("color", "fill")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow (cfs)") +
  ggtitle("Example incorrect method of aggregating - do not use")

DSMHabitat Comparison

Moved this over from model_cleaned.Rmd

mainstems_comid <- 
  read_sf(file.path("/vsizip", here::here("data-raw", "source", "rearing_spatial_data", "nhdplusv2_comid_habitat_xw.shp.zip"))) |>
  janitor::clean_names() |>
  st_zm() |>
  st_transform(st_crs(habistat::flowline_geom_proj)) |>
  mutate(length_ft = st_length(geometry) |> units::set_units("ft") |> units::drop_units()) |>
  filter(str_detect(habitat, "rearing")) |>
  left_join(habistat::flowline_attr |> 
              select(comid, hqt_gradient_class, river_group), by=join_by(comid)) |>
  filter(!(watershed %in% c("Sacramento River", "San Joaquin River")))

mainstems <-
  mainstems_comid |>
  group_by(watershed, river, hqt_gradient_class) |>
  summarize() 
## `summarise()` has grouped output by 'watershed', 'river'. You can override
## using the `.groups` argument.
mainstems_comid |> 
  ggplot() + 
  geom_sf(aes(group=river, color=hqt_gradient_class)) + 
  theme(legend.key.height = unit(12, "point")) +
  ggtitle("DSMhabitat reaches")

# http://cvpia-habitat-docs-markdown.s3-website-us-west-2.amazonaws.com/watershed/Regional_Approximation.html
# These are the watersheds that use regional approximation for instream rearing habitat
regional_approx_groups <- 
  c("Antelope Creek", "Bear Creek", "Big Chico Creek", 
    "Elder Creek", "Mill Creek", "Paynes Creek", "Stony Creek", "Thomes Creek")
# These are the watersheds that use regional approximation for instream spawning habitat
regional_approx_groups_spawning <- 
  c("Antelope Creek", "Bear Creek", "Big Chico Creek", "Cow Creek",
    "Elder Creek", "Mill Creek", "Paynes Creek", "Stony Creek", "Thomes Creek")
# These are the watersheds that use regional approximation for spawning and instream rearing in the san joaquin basin
regional_approx_groups_sanjoaquin <-
  c("Cosumnes River")

# These are the watersheds that use scaled proxies for floodplain habitat
deer_creek_fp_proxy <-
  c("Antelope Creek", "Bear Creek", "Cow Creek", "Mill Creek", "Paynes Creek") 
deer_creek_partial_model_scaled <- c("Big Chico Creek")
cottonwood_creek_fp_proxy <- c("Stony Creek", "Thomes Creek")
tuolumne_river_fp_proxy <- c("Calaveras River")
fp_proxy_groups <- c(deer_creek_fp_proxy, cottonwood_creek_fp_proxy, tuolumne_river_fp_proxy)

# These are reaches where suitability is already applied so we shouldn't be using DSMhabitat::apply_suitability to scale again. List is from the help docs for DSMhabitat::apply_suitability
suitability_already_applied <- c("Antelope Creek", "Battle Creek", "Bear Creek", "Cow Creek", "Deer Creek", "Mill Creek", "Paynes Creek", "Sacramento River", "Sutter Bypass", "Yolo Bypass", "North Delta", "South Delta")
# pretty sure that all the scaled watersheds are in this too...
suitability_already_applied <- unique(c(suitability_already_applied, fp_proxy_groups))

#remotes::install_github("CVPIA-OSC/DSMhabitat")
watersheds <- mainstems |> pull(watershed) |> unique()
watershed_name <- tolower(gsub(pattern = "-| ", replacement = "_", x = watersheds))
watershed_rda_name <- paste(watershed_name, "floodplain", sep = "_")

dsm_habitat_floodplain <- map_df(watershed_rda_name, function(watershed) {
  df <- as.data.frame(do.call(`::`, list(pkg = "DSMhabitat", name = watershed)))
}) |> 
  select(river_group = watershed,
         flow_cfs,
         ends_with("_floodplain_acres")) |>
  pivot_longer(cols = ends_with("_floodplain_acres"), 
               names_transform = \(x) str_replace(x, "_floodplain_acres", ""),
               names_to = "run",
               values_to = "suitable_ac") |>
               #values_to = "floodplain_acres") |>
  mutate(run = run |> factor(levels = c("FR", "LFR", "WR", "SR", "ST"),
                             labels = c("fall", "late fall", "winter", "spring", "steelhead")),
         hab = "floodplain" |> factor(levels = c("spawn", "fry", "juv", "adult", "floodplain"))) |>
  mutate(river_group = if_else(river_group == "Consumnes", "Cosumnes River", river_group))
         #floodplain_acres_suitable = if_else(river_group %in% suitability_already_applied,
         #                                    floodplain_acres,
         #                                    DSMhabitat::apply_suitability(floodplain_acres * 4046.86) / 4046.86))

dsm_habitat_instream <- map_df(paste(watershed_name, "instream", sep = "_"), 
                               possibly(function(watershed) {
                                 df <- as.data.frame(do.call(`::`, list(pkg = "DSMhabitat", name = watershed)))
                                 }, otherwise = NULL)) |> 
  select(river_group = watershed,
         flow_cfs,
         ends_with("_wua")) |>
  pivot_longer(cols = ends_with("_wua"), 
               names_transform = \(x) str_replace(x, "_wua", ""),
               names_to = "run_hab",
               values_to = "wua_per_1000ft") |>
  separate_wider_delim(run_hab, names = c("run", "hab"), delim = "_") |>
  mutate(wua_per_lf = wua_per_1000ft / 1000,
         run = run |> factor(levels = c("FR", "LFR", "WR", "SR", "ST"),
                             labels = c("fall", "late fall", "winter", "spring", "steelhead")), 
         # TODO: use "labels" to align these with habistat names like "fall" instead of "FR"
         hab = hab |> factor(levels = c("spawn", "fry", "juv", "adult", "floodplain"))) |>
  drop_na() |>
  select(-wua_per_1000ft)

dsm_habitat_instream_sqm <- map_df(paste(watershed_name, "instream", sep = "_"), 
                               possibly(function(watershed) {
                                 df <- as.data.frame(do.call(`::`, list(pkg = "DSMhabitat", name = watershed)))
                                 }, otherwise = NULL)) |> 
  select(river_group = watershed,
         flow_cfs,
         ends_with("_sqm")) |>
  pivot_longer(cols = ends_with("_sqm"), 
               names_transform = \(x) str_replace(x, "_sqm", ""),
               names_to = "run_hab",
               values_to = "suitable_sqm") |>
  separate_wider_delim(run_hab, names = c("run", "hab"), delim = "_") |>
  mutate(run = run |> factor(levels = c("FR", "LFR", "WR", "SR", "ST"),
                             labels = c("fall", "late fall", "winter", "spring", "steelhead")),
         hab = hab |> factor(levels = c("spawn", "fry", "juv", "adult", "floodplain"))) |>
  mutate(suitable_ac = suitable_sqm * (1 / 0.3048)^2 * (1 / 43560)) |>
  select(-suitable_sqm)

# flow ranges

dsm_flows <- bind_rows(dsm_habitat_floodplain, dsm_habitat_instream) |>
  group_by(river_group, flow_cfs) |>
  summarize() |>
  ungroup() |>
  arrange(river_group, flow_cfs)
## `summarise()` has grouped output by 'river_group'. You can override using the
## `.groups` argument.
dsm_flow_ranges <- 
  dsm_flows |> 
  group_by(river_group) |> 
  summarize(min_flow_cfs = min(flow_cfs), max_flow_cfs = max(flow_cfs))

mainstems_comid |> 
  st_zm() |> 
  filter(comid %in% mainstems_comid$comid) |>
  ggplot() + 
  geom_sf(aes(color=river_group)) + 
  theme(legend.key.height = unit(12, "point"))

cv_mainstems_spawning <- 
  habistat::cv_mainstems |>
  filter(str_detect(habitat, "spawning")) |>
  group_by(river_group) |>
  summarize() |>
  mutate(length_ft = st_length(geometry) |> units::set_units("ft") |> units::drop_units())

cv_mainstems_rearing <- 
  habistat::cv_mainstems |>
  filter(str_detect(habitat, "rearing")) |>
  group_by(river_group) |>
  summarize() |>
  mutate(length_ft = st_length(geometry) |> units::set_units("ft") |> units::drop_units())

cvpia_reach_lengths <- 
  bind_rows("spawn" = st_drop_geometry(cv_mainstems_spawning),
            "juv" = st_drop_geometry(cv_mainstems_rearing),
            "floodplain" = st_drop_geometry(cv_mainstems_rearing),
            .id = "hab") |>
  mutate(hab = factor(hab, levels = c("spawn", "juv", "floodplain")))

dsm_habitat_combined <-
  bind_rows("instream_wua" = 
              dsm_habitat_instream |>
              filter(hab %in% c("spawn", "juv")) |>
              drop_na(wua_per_lf),
            "instream_sqm" = 
              dsm_habitat_instream_sqm |>
              filter(hab %in% c("spawn", "juv")) |>
              drop_na(suitable_ac),
            "floodplain_acres" = 
              dsm_habitat_floodplain |>
              select(river_group, hab, run, flow_cfs, suitable_ac) |>
              drop_na(suitable_ac),
            .id = "source_datatype") |>
  inner_join(cvpia_reach_lengths, by=join_by(river_group, hab)) |>
  arrange(river_group, hab, run, flow_cfs) |>
  group_by(river_group, hab, run) |>
  mutate(wua_per_lf = zoo::na.approx(wua_per_lf, x = flow_cfs, na.rm=F),
         suitable_ac = zoo::na.approx(suitable_ac, x = flow_cfs, na.rm=F)) |>
  mutate(suitable_ac = coalesce(suitable_ac, (wua_per_lf * length_ft / 43560)),
         wua_per_lf = coalesce(wua_per_lf, (suitable_ac * 43560 / length_ft))) |>
  mutate(regional_approx = 
           ((hab == "floodplain") & ((river_group %in% c(deer_creek_fp_proxy, cottonwood_creek_fp_proxy, tuolumne_river_fp_proxy)))) |
           ((hab == "juv") & ((river_group %in% c(regional_approx_groups, regional_approx_groups_sanjoaquin)))) |
           ((hab == "spawn") & ((river_group %in% c(regional_approx_groups_spawning, regional_approx_groups_sanjoaquin)))))|>
  mutate(hab_type = case_when(hab == "juv" ~ "rearing",
                              hab == "spawn" ~ "spawning",
                              hab == "floodplain" ~ "rearing"),
         hab_subtype = case_when(hab == "juv" ~ "instream",
                                 hab == "spawn" ~ "instream",
                                 hab == "floodplain" ~ "floodplain"))
wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "rearing") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = paste("habistat", habitat)), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, color = paste("habistat", habitat))) +
  scale_x_log10() +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "rearing") |>
              filter(flow_cfs <= 15000), 
            aes(x = flow_cfs, y = suitable_ac, color = paste("DSMhabitat", hab_subtype), linetype = regional_approx)) +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(name = "Habitat Data Source", palette = "Dark2", aesthetics = c("color", "fill")) +
  scale_linetype_manual(name = "Regional Approx.", values = c("TRUE" = "dashed", "FALSE" = "solid")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
    guides(color = guide_legend(nrow = 2), linetype = guide_legend(nrow = 2)) +
  ggtitle("Rearing Comparison - Total Acres")

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "spawning") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = paste("habistat", habitat)), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, color = paste("habistat", habitat))) +
  scale_x_log10() +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "spawning") |>
              filter(flow_cfs <= 15000), 
            aes(x = flow_cfs, y = suitable_ac, color = paste("DSMhabitat", "spawning"), linetype = regional_approx)) +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(name = "Habitat Data Source", palette = "Dark2", aesthetics = c("color", "fill")) +
  scale_linetype_manual(name = "Regional Approx.", values = c("TRUE" = "dashed", "FALSE" = "solid")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
    guides(color = guide_legend(nrow = 2), linetype = guide_legend(nrow = 2)) +
  ggtitle("Spawning Comparison - Total Acres")
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "rearing") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_per_lf_pred_SD, ymax = wua_per_lf_pred_SN, 
                  fill = paste("habistat", habitat)), alpha=0.33) +
  geom_line(aes(y = wua_per_lf_pred, color = paste("habistat", habitat))) +
  scale_x_log10() +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "rearing") |>
              filter(flow_cfs <= 15000), 
            aes(x = flow_cfs, y = wua_per_lf, color = paste("DSMhabitat", hab_subtype), linetype = regional_approx)) +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(name = "Habitat Data Source", palette = "Dark2", aesthetics = c("color", "fill")) +
  scale_linetype_manual(name = "Regional Approx.", values = c("TRUE" = "dashed", "FALSE" = "solid")) +
  ylab("Predicted Habitat (ft2) per LF") + xlab("Flow at Outlet (cfs)") + 
    guides(color = guide_legend(nrow = 2), linetype = guide_legend(nrow = 2)) +
  ggtitle("Rearing Comparison - Suitable Area per Linear Ft")

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "spawning") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_per_lf_pred_SD, ymax = wua_per_lf_pred_SN, 
                  fill = paste("habistat", habitat)), alpha=0.33) +
  geom_line(aes(y = wua_per_lf_pred, color = paste("habistat", habitat))) +
  scale_x_log10() +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "spawning") |>
              filter(flow_cfs <= 15000), 
            aes(x = flow_cfs, y = wua_per_lf, color = paste("DSMhabitat", "spawning"), linetype = regional_approx)) +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(name = "Habitat Data Source", palette = "Dark2", aesthetics = c("color", "fill")) +
  scale_linetype_manual(name = "Regional Approx.", values = c("TRUE" = "dashed", "FALSE" = "solid")) +
  ylab("Predicted Habitat (ft2) per LF") + xlab("Flow at Outlet (cfs)") + 
    guides(color = guide_legend(nrow = 2), linetype = guide_legend(nrow = 2)) +
  ggtitle("Spawning Comparison - Suitable Area per Linear Ft")
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

Comparison with Regional Flow Approximation

Instream Rearing and Spawning

upper_mid_sac_tribs <- DSMhabitat::modeling_exist %>% 
  left_join(DSMhabitat::watershed_regions, by = c('Watershed' = 'watershed')) %>% 
  filter(region == 'Upper-mid Sacramento River', 
         Watershed != 'Upper-mid Sacramento River'
  )
watersheds_without_modeling <- upper_mid_sac_tribs %>%
  select(watershed = Watershed, contains('spawn')) %>% 
  gather(species, spawn_modeling, -watershed) %>% 
  group_by(watershed) %>% 
  summarise(no_modeling = sum(spawn_modeling, na.rm = T) == 0) %>% 
  filter(no_modeling) %>% 
  pull(watershed)
cat(c("Watersheds without any Spawning (spawn) Model:", paste(watersheds_without_modeling, collapse = ", "), "\n"))
## Watersheds without any Spawning (spawn) Model: Antelope Creek, Bear Creek, Big Chico Creek, Cow Creek, Elder Creek, Mill Creek, Paynes Creek, Stony Creek, Thomes Creek
watersheds_with_spawn <- upper_mid_sac_tribs %>% 
  filter(!(Watershed %in% watersheds_without_modeling)) %>% 
  pull(Watershed)
cat(c("Watersheds with Spawning (FR spawn) Model:", paste(watersheds_with_spawn, collapse = ", "), "\n"))
## Watersheds with Spawning (FR spawn) Model: Battle Creek, Butte Creek, Clear Creek, Cottonwood Creek, Deer Creek
watersheds_with_fry <- upper_mid_sac_tribs %>% 
  filter(FR_fry) %>% 
  pull(Watershed)
cat(c("Watersheds with Rearing (FR fry) Model:", paste(watersheds_with_fry, collapse = ", "), "\n"))
## Watersheds with Rearing (FR fry) Model: Battle Creek, Clear Creek, Cottonwood Creek, Cow Creek
watersheds_with_juv <- upper_mid_sac_tribs %>% 
  filter(FR_juv) %>% 
  pull(Watershed)
cat(c("Watersheds with Rearing (FR juv) Model:", paste(watersheds_with_juv, collapse = ", "), "\n"))
## Watersheds with Rearing (FR juv) Model: Battle Creek, Clear Creek, Cottonwood Creek, Cow Creek, Deer Creek
river_lengths <- 
  mainstems_comid |> # note this is a general, not run-specific habitat extent dataset
  st_drop_geometry() |>
  group_by(watershed, habitat) |>
  summarize(length_ft = sum(length_ft)) |>
  pivot_wider(names_from = habitat, 
              values_from = length_ft, 
              names_repair = janitor::make_clean_names) |>
  transmute(river_group = watershed, 
            juv = coalesce(rearing, 0) + coalesce(rearing_and_spawning, 0),
            spawn = coalesce(rearing_and_spawning, 0)) |>
  pivot_longer(cols = c(juv, spawn),
               names_to = "hab",
               values_to = "length_ft") |>
  mutate(length_rm = length_ft / 5280) |>
  glimpse()
## `summarise()` has grouped output by 'watershed'. You can override using the
## `.groups` argument.

## Rows: 48
## Columns: 5
## Groups: watershed [24]
## $ watershed   <chr> "American River", "American River", "Antelope Creek", "Ant…
## $ river_group <chr> "American River", "American River", "Antelope Creek", "Ant…
## $ hab         <chr> "juv", "spawn", "juv", "spawn", "juv", "spawn", "juv", "sp…
## $ length_ft   <dbl> 118816.94, 99546.05, 160133.24, 126501.58, 127772.02, 1215…
## $ length_rm   <dbl> 22.50321, 18.85342, 30.32826, 23.95863, 24.19925, 23.02454…
interp_flows <- seq_log10(50, 15000, 0.05, snap=100) # to align with habistat flows

regional_approx_curve <- 
  DSMhabitat::upper_mid_sac_region_instream |>
  # select spawn and juv and convert WUA/1000LF to WUA/LF
  transmute(flow_cfs, 
            spawn = FR_spawn_wua / 1000, 
            juv = FR_juv_wua / 1000) |>
  pivot_longer(cols = c(spawn, juv), names_to = "hab", values_to = "wua_per_lf") |>
  # interpolate onto the habistat flow sequence
  group_by(hab) |>
  reframe(wua_per_lf = approx(y = wua_per_lf, x = log(flow_cfs), xout = log(interp_flows), na.rm = F)$y,
          flow_cfs = interp_flows) |>
  drop_na()

regional_approx_curve |>
  ggplot() + 
  geom_line(aes(x = flow_cfs, y = wua_per_lf, color = hab)) +
  scale_x_log10() +
  scale_y_continuous(limits = c(0, NA), 
                     breaks = scales::breaks_width(2), 
                     expand = c(0, 0)) +
  annotation_logticks(sides = "b") + 
  theme(panel.grid.minor = element_blank(),
        strip.placement = "outside",
        strip.text.y.left = element_text(angle = 0)) + 
  labs(title = "Upper-Mid Sacramento Regional Approximation",
       subtitle = "Fall Run Chinook Salmon",
       caption = paste("Spawning (spawn) based on: Battle Creek, Butte Creek, Clear Creek.",
                       "Rearing (juv) based on: Battle Creek, Butte Creek, Clear Creek, Cow Creek.",
                       sep = "\n")) +
  xlab("Flow (cfs)") +
  ylab("Suitable Habitat Area (ft2) per linear ft") +
  scale_color_brewer(name = "Habitat Type", palette = "Dark2", aesthetics = c("color", "fill")) 

instream_regional_approx_est <- 
  river_lengths |>
  inner_join(regional_approx_curve, 
             by = join_by(hab), 
             relationship = "many-to-many") |>
  mutate(suitable_ac = wua_per_lf * length_ft / 43560,
         habitat = case_when(hab == "spawn" ~ "spawning",
                             hab == "juv" ~ "rearing"))

instream_regional_approx_est  |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_line(aes(y = suitable_ac, color = habitat)) +
  scale_x_log10() +
  theme(legend.position = "top", panel.grid.minor = element_blank()) + 
  scale_color_brewer(name = "Habitat Type", palette = "Dark2", aesthetics = c("color", "fill")) +
  scale_linetype_manual(name = "Regional Approx.", values = c("TRUE" = "dashed", "FALSE" = "solid")) +
  ylab("Predicted Total Habitat (acres)") + xlab("Flow at Outlet (cfs)") + 
  guides(color = guide_legend(nrow = 2), linetype = guide_legend(nrow = 2)) +
  ggtitle("Instream Regional Approximation Applied to All Watersheds")
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's linetype values.

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "rearing") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  filter(river_group %in% upper_mid_sac_tribs$Watershed) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = "habistat"), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, 
                linetype = "combined instream + floodplain",
                color = "habistat")) +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "rearing") |>
              filter(flow_cfs <= 15000) |>
              #filter(!regional_approx) |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed), 
            aes(x = flow_cfs, 
                y = suitable_ac, 
                linetype = hab_subtype,
                color = paste("DSMHabitat", if_else(regional_approx, "approximation", "hydraulic model")))) +
  geom_line(data = instream_regional_approx_est |>
              filter(habitat == "rearing") |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed),
            aes(y = suitable_ac,, 
                linetype = "instream",
                color = "DSMHabitat regional approx")) +
  scale_x_log10() +
  scale_linetype_manual(name = "Habitat Type", 
                     values = c("instream" = "solid", #"#6388b4",
                                "floodplain" = "dashed", #"#ef6f6a",
                                "combined instream + floodplain" = "dotted")) + ##bb7693"),) +
  scale_color_manual(name = "Data Source", 
                     values = c("habistat" = "#ffae34", 
                                "DSMHabitat hydraulic model" = "#6388b4",
                                "DSMHabitat regional approx" = "#ef6f6a"),
                     aesthetics = c("color", "fill")) +
  theme(legend.position = "top", 
        panel.grid.minor = element_blank()) + 
  guides(color = guide_legend(nrow = 3), 
         linetype = guide_legend(nrow = 3)) +
  ylab("Predicted Total Habitat (acres)") + 
  xlab("Flow at Outlet (cfs)") + 
  ggtitle("Rearing Comparison for Upper-Mid Sacramento Tributaries - Total Acres")

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "spawning") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  filter(river_group %in% upper_mid_sac_tribs$Watershed) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = "habistat"), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, 
                linetype = "instream",
                color = "habistat")) +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "spawning") |>
              filter(flow_cfs <= 15000) |>
              #filter(!regional_approx) |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed), 
            aes(x = flow_cfs, 
                y = suitable_ac, 
                linetype = hab_subtype,
                color = paste("DSMHabitat", if_else(regional_approx, "approximation", "hydraulic model")))) +
  geom_line(data = instream_regional_approx_est |>
              filter(habitat == "spawning") |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed),
            aes(y = suitable_ac,, 
                linetype = "instream",
                color = "DSMHabitat regional approx")) +
  scale_x_log10() +
  scale_linetype_manual(name = "Habitat Type", 
                     values = c("instream" = "solid", #"#6388b4",
                                "floodplain" = "dashed", #"#ef6f6a",
                                "combined instream + floodplain" = "dotted")) + ##bb7693"),) +
  scale_color_manual(name = "Data Source", 
                     values = c("habistat" = "#ffae34", 
                                "DSMHabitat hydraulic model" = "#6388b4",
                                "DSMHabitat regional approx" = "#ef6f6a"),
                     aesthetics = c("color", "fill")) +
  theme(legend.position = "top", 
        panel.grid.minor = element_blank()) + 
  guides(color = guide_legend(nrow = 3), 
         linetype = guide_legend(nrow = 3)) +
  ylab("Predicted Total Habitat (acres)") + 
  xlab("Flow at Outlet (cfs)") + 
  ggtitle("Spawning Comparison for Upper-Mid Sacramento Tributaries - Total Acres")

Floodplain

# from DSMhabitat data-raw/watershed/floodplain_utils.R and modified for this use case 

# attempt at dec_jun_mean_flow_scaling value replication... 
# DSMflow::flows_cfs$biop_itp_2018_2019 |> select(date, `Antelope Creek`, `Deer Creek`) |> janitor::clean_names() |> filter(month(date) %in% c(6,12)) |> mutate(scale = antelope_creek/deer_creek) 

scale_fp_flow_area <- function(ws, proxy_ws, flow_scalar) {

  watershed_metadata <- filter(DSMhabitat::floodplain_modeling_metadata, 
                                watershed == ws)
  
  # optional manual overrides of values that are pulled from watershed_metadata:
  selected_proxy_ws <- if(missing(proxy_ws)) watershed_metadata$scaling_watershed else proxy_ws
  selected_flow_scalar <- if(missing(flow_scalar)) watershed_metadata$dec_jun_mean_flow_scaling else flow_scalar
  
  low_gradient <- filter(DSMhabitat::low_gradient_lengths, watershed_name == ws)
  
  rearing_extents <- filter(DSMhabitat::watershed_lengths, watershed == ws, lifestage == "rearing") %>% 
    select(watershed, species, miles) %>% 
    spread(species, miles)
  
  proxy_watershed_metadata <- filter(DSMhabitat::floodplain_modeling_metadata, 
                                      watershed == selected_proxy_ws)
  
  species_present <- filter(DSMhabitat::watershed_species_present, 
                            watershed_name == ws)  
  
  
  proxy_data <- switch(selected_proxy_ws,
                       "Deer Creek" = DSMhabitat::deer_creek_floodplain,
                       "Tuolumne River" = DSMhabitat::tuolumne_river_floodplain,
                       "Cottonwood Creek" = DSMhabitat::cottonwood_creek_floodplain)
  
  # scale flow
  scaled_flow <- round(proxy_data$flow_cfs * selected_flow_scalar)
  
  # fall run area
  # divide floodplain area by watershed length of proxy watershed to get area/mile, scale to hydrology
  scaled_area_per_mile_FR <- (proxy_data$FR_floodplain_acres / proxy_watershed_metadata$modeled_length_mi) *
  selected_flow_scalar
  
  # apportion area by high gradient/low gradient, .1 is downscaling for high gradient
  fp_area_FR <- round((scaled_area_per_mile_FR * low_gradient$fr) +
    (scaled_area_per_mile_FR * (rearing_extents$fr - low_gradient$fr) * 0.1), 2)
  
  result <- data.frame(
    flow_cfs = scaled_flow,
    FR_floodplain_acres = fp_area_FR
  )
  
  if (species_present$lfr) {
    # latefall floodplain area
    lfr_proxy <- if ("LFR_floodplain_acres" %in% colnames(proxy_data)) {
      proxy_data$LFR_floodplain_acres
    } else {
      proxy_data$FR_floodplain_acres 
    }
    
    scaled_area_per_mile_LFR <- (lfr_proxy / proxy_watershed_metadata$modeled_length_mi) *
      selected_flow_scalar
    
    fp_area_LFR <-round((scaled_area_per_mile_LFR * low_gradient$lfr) +
      (scaled_area_per_mile_LFR * (rearing_extents$lfr - low_gradient$lfr) * 0.1), 2)
    
    result <- bind_cols(result, LFR_floodplain_acres = fp_area_LFR)
  }
  
  if (species_present$wr) {
    # winter run floodplain area
   wr_proxy <- if ("WR_floodplain_acres" %in% colnames(proxy_data)) {
     proxy_data$WR_floodplain_acres
   } else {
     proxy_data$FR_floodplain_acres 
   }
   
    scaled_area_per_mile_WR <- (wr_proxy / proxy_watershed_metadata$modeled_length_mi) *
      selected_flow_scalar
    
    fp_area_WR <-round((scaled_area_per_mile_WR * low_gradient$wr) +
      (scaled_area_per_mile_WR * (rearing_extents$wr - low_gradient$wr) * 0.1), 2)
    
    result <- bind_cols(result, WR_floodplain_acres = fp_area_WR) 
  }
  
  if (species_present$sr) {
    # spring run floodplain area
    sr_proxy <- if ("SR_floodplain_acres" %in% colnames(proxy_data)) {
      proxy_data$SR_floodplain_acres
    } else {
      if ("ST_floodplain_acres" %in% colnames(proxy_data)) {
        proxy_data$ST_floodplain_acres 
      } else {
        proxy_data$FR_floodplain_acres 
      }
    }
    
    scaled_area_per_mile_SR <- (sr_proxy / proxy_watershed_metadata$modeled_length_mi) *
      selected_flow_scalar
    
    fp_area_SR <- round((scaled_area_per_mile_SR * low_gradient$sr) +
      (scaled_area_per_mile_SR * (rearing_extents$sr - low_gradient$sr) * 0.1), 2)
    
    result <- bind_cols(result, SR_floodplain_acres = fp_area_SR)
  }
  
  if (species_present$st) {
    # steelhead floodplain area
    st_proxy <- if ("ST_floodplain_acres" %in% colnames(proxy_data)) {
      proxy_data$ST_floodplain_acres
    } else {
      if ("SR_floodplain_acres" %in% colnames(proxy_data)) {
        proxy_data$SR_floodplain_acres 
      } else {
        proxy_data$FR_floodplain_acres 
      }
    }
    
    scaled_area_per_mile_ST <- (st_proxy / proxy_watershed_metadata$modeled_length_mi) *
      selected_flow_scalar
    
    fp_area_ST <- round((scaled_area_per_mile_ST * low_gradient$st) +
      (scaled_area_per_mile_ST * (rearing_extents$st - low_gradient$st) * 0.1), 2)
    
    result <- bind_cols(result, ST_floodplain_acres = fp_area_ST)
  }
  
  result <- result |> 
    mutate(watershed = ws, 
           proxy_watershed = selected_proxy_ws)
  
  return(mutate(result, watershed = ws))
}
# comparison file: 
# wua_predicted_cv_mainstems_grouped

watersheds_fp_scale <- DSMhabitat::floodplain_modeling_metadata |> 
  filter(!is.na(dec_jun_mean_flow_scaling)) |> 
  pull(watershed)

full_fp_results <- data.frame()
for(i in 1:length(watersheds_fp_scale)) {
  result <- scale_fp_flow_area(watersheds_fp_scale[i])
  full_fp_results <- bind_rows(full_fp_results, result)
}

full_fp_results |> 
  ggplot(aes(x = flow_cfs, y = FR_floodplain_acres, color = proxy_watershed)) + 
  geom_line() +
  facet_wrap(~watershed, scales = "free_y") + 
  scale_x_log10(breaks = scales::breaks_log(),
                labels = scales::label_number()) +
  theme(panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90)) 

# full_deer_creek_fp_results <- data.frame()
# for(i in 1:length(deer_creek_fp_proxy)) {
#   result <- scale_fp_flow_area(deer_creek_fp_proxy[i])
#   full_deer_creek_fp_results <- bind_rows(full_deer_creek_fp_results, result)
# }
# 
# flows <- c(DSMhabitat::mill_creek_floodplain$flow_cfs, DSMhabitat::cow_creek_floodplain$flow_cfs)
# 
# # get_approx <- function(df, species_column) {
# #   approxfun(df$flow_cfs, df[,species_column, drop = TRUE], rule = 2)
# # }
# 
# full_deer_creek_fp_results |> 
#   select(flow_cfs, floodplain_acres = FR_floodplain_acres, watershed) |> 
#   ggplot(aes(x = flow_cfs, y = floodplain_acres, color = watershed)) + 
#   geom_line() + 
#   ggtitle("Deer Creek Floodplain Proxy Approximations")
# 
# full_cottonwood_creek_fp_results <- data.frame()
# for(i in 1:length(cottonwood_creek_fp_proxy)) {
#   result <- scale_fp_flow_area(cottonwood_creek_fp_proxy[i])
#   full_cottonwood_creek_fp_results <- bind_rows(full_cottonwood_creek_fp_results, result)
# }
# 
# full_cottonwood_creek_fp_results |> 
#   select(flow_cfs, floodplain_acres = FR_floodplain_acres, watershed) |> 
#   ggplot(aes(x = flow_cfs, y = floodplain_acres, color = watershed)) + 
#   geom_line() + 
#   ggtitle("Cottonwood Creek Floodplain Proxy Approximations")
floodplain_proxy_est_v1 <- 
  DSMhabitat::floodplain_modeling_metadata |> 
  filter(!is.na(dec_jun_mean_flow_scaling)) |>
  mutate(result = pmap(list(watershed, scaling_watershed, dec_jun_mean_flow_scaling),
                       scale_fp_flow_area)) |>
  rename(river_group = watershed) |>
  unnest(result) |>
  mutate(habitat = "floodplain") |>
  pivot_longer(ends_with("_floodplain_acres"),
               names_transform = \(x) str_replace(x, "_floodplain_acres", ""),
               names_to = "run", 
               values_to = "floodplain_ac") |>
  mutate(run = factor(run, 
                      levels = c("FR", "LFR", "WR", "SR", "ST"),
                      labels = c("fall", "late fall", "winter", "spring", "steelhead"))) |>
  mutate(suitable_ac = #if_else(river_group %in% suitability_already_applied,
                               floodplain_ac,
                      #         DSMhabitat::apply_suitability(floodplain_ac * 4046.86) / 4046.86)
         )
wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "rearing") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  filter(river_group %in% upper_mid_sac_tribs$Watershed) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = "habistat"), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, 
                linetype = "combined instream + floodplain",
                color = "habistat")) +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "rearing") |>
              filter(flow_cfs <= 15000) |>
              #filter(!regional_approx) |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed), 
            aes(x = flow_cfs, 
                y = suitable_ac, 
                linetype = hab_subtype,
                color = paste("DSMHabitat", if_else(regional_approx, "approximation", "hydraulic model")))) +
  geom_line(data = instream_regional_approx_est |>
              filter(habitat == "rearing") |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed),
            aes(y = suitable_ac,, 
                linetype = "instream",
                color = "DSMHabitat approximation")) +
  geom_line(data = floodplain_proxy_est_v1 |>
              filter(habitat == "floodplain" & run == "fall") |>
              filter(river_group %in% upper_mid_sac_tribs$Watershed),
            aes(y = suitable_ac,, 
                linetype = "floodplain",
                color = "DSMHabitat approximation")) +
  scale_x_log10() +
  scale_linetype_manual(name = "Habitat Type", 
                     values = c("instream" = "solid", #"#6388b4",
                                "floodplain" = "dashed", #"#ef6f6a",
                                "combined instream + floodplain" = "dotted")) + ##bb7693"),) +
  scale_color_manual(name = "Data Source", 
                     values = c("habistat" = "#ffae34", 
                                "DSMHabitat hydraulic model" = "#6388b4",
                                "DSMHabitat approximation" = "#ef6f6a"),
                     aesthetics = c("color", "fill")) +
  theme(legend.position = "top", 
        panel.grid.minor = element_blank()) + 
  guides(color = guide_legend(nrow = 3), 
         linetype = guide_legend(nrow = 3)) +
  ylab("Predicted Total Habitat (acres)") + 
  xlab("Flow at Outlet (cfs)") + 
  ggtitle("Rearing Comparison for Upper-Mid Sacramento Tributaries - Total Acres")

Replicate monthly flow scaling

Streams: https://www.sciencebase.gov/catalog/item/61e7442ed34e3618e01cf68f Monthly inflows: https://www.sciencebase.gov/catalog/item/61e74499d34e3618e01cf694 Monthly diversions: https://www.sciencebase.gov/catalog/item/61e74461d34e3618e01cf691

Version using CVHM2:

inflow_locs <- 
  read_sf(here::here("data-raw", "source", "monthly_flows", "CVHM2_InflowLocations.shp.zip")) |>
  janitor::clean_names()

inflow_vals <- 
  read_csv(here::here("data-raw", "source", "monthly_flows", "CVHM2_Inflows_1921_2019.csv")) |>
  pivot_longer(cols = c(-Month, -Year, -YYYYMM), 
               names_to = "inflow_id",
               values_to = "flow_cmd") |>
  janitor::clean_names() 
## Rows: 1181 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (65): Month, Year, YYYYMM, SACR_205, COWC_211, BATT_220, COTT_NF, COTT_M...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
inflow_xw <- 
  read_csv(here::here("data-raw", "source", "monthly_flows", "CVHM2_Inflows_XW.csv"))
## Rows: 65 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, inflow_id, river_group
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mean_monthly_inflow <-
  inflow_vals |>
  group_by(inflow_id, month) |>
  summarize(flow_cmd = mean(flow_cmd)) |>
  # convert cubic meters per day to cubic feet per second
  mutate(flow_cfs = flow_cmd * (1 / 0.3048)^3 * (1 / 86400)) |>
  inner_join(inflow_xw, by = join_by(inflow_id)) %>%
  inner_join(. |>
               filter(inflow_id %in% c("DEER_256", "COTT_MF", "TUOL_135")) |>
               select(inflow_id, month, flow_cfs) |>
               pivot_wider(names_from = inflow_id, values_from = flow_cfs) |>
               rename(q_deer = DEER_256,
                      q_cottonwood = COTT_MF,
                      q_tuolumne = TUOL_135),
             by = join_by(month))
## `summarise()` has grouped output by 'inflow_id'. You can override using the
## `.groups` argument.
flow_scalars <- 
  mean_monthly_inflow |>
  filter(month %in% c(12, 1, 2, 3, 4, 5, 6)) |>
  group_by(river_group) |>
  summarize(across(c(flow_cfs, starts_with("q_")), mean)) |>
  mutate(qscal_cottonwood = flow_cfs / q_cottonwood,
         qscal_deer = flow_cfs / q_deer,
         qscal_tuolumne = flow_cfs / q_tuolumne)

# flow_scalars |>
#   rename(scaling_flow = flow_cfs) |>
#   pivot_longer(cols = starts_with("qscal_"),
#                names_transform = \(x) str_replace(x, "qscal_", ""),
#                values_to = "flow_scalar") |>
#   mutate(proxy_ws = case_when(name == "cottonwood" ~ "Cottonwood Creek",
#                               name == "deer" ~ "Deer Creek",
#                               name == "tuolumne" ~ "Tuolumne River")) |>
#   mutate(result = pmap(list(river_group, proxy_ws, flow_scalar),
#                        possibly(scale_fp_flow_area, NA))) |>
#   unnest(result) |>
#   mutate(habitat = "floodplain") |>
#   pivot_longer(ends_with("_floodplain_acres"),
#                names_transform = \(x) str_replace(x, "_floodplain_acres", ""),
#                names_to = "run", 
#                values_to = "suitable_ac") |>
#   mutate(run = factor(run, 
#                       levels = c("FR", "LFR", "WR", "SR", "ST"),
#                       labels = c("fall", "late fall", "winter", "spring", "steelhead")))

Version using DSMFlow:

mean_dec_jun_flows <- 
  DSMflow::flows_cfs$biop_itp_2018_2019 |> 
  pivot_longer(cols = -date, names_to = "watershed", values_to = "flow_cfs") |>
  mutate(water_year = year(date) + if_else(month(date) >= 10, 1, 0)) |>
  filter(month(date) %in% c(12, 1, 2, 3, 4, 5, 6)) |>
  group_by(watershed) |>
  summarize(mean_flow_cfs = mean(flow_cfs))
#  select(date, `Antelope Creek`, `Deer Creek`) |> 
#  janitor::clean_names() |> 
#  filter(month(date) %in% c(6,12)) |> mutate(scale = antelope_creek/deer_creek)

proxy_watersheds <- 
  c("dc" = "Deer Creek", 
    "cc" = "Cottonwood Creek", 
    "tr" = "Tuolumne River")

proxy_flows <- 
  mean_dec_jun_flows |>
  filter(watershed %in% proxy_watersheds) |>
  pivot_wider(names_from = watershed, 
              values_from = mean_flow_cfs) |>
  rename_with(\(x) setNames(names(proxy_watersheds), proxy_watersheds)[x])

flow_scalars <- 
  mean_dec_jun_flows |>
  mutate(qscal_dc = mean_flow_cfs / proxy_flows$dc,
         qscal_cc = mean_flow_cfs / proxy_flows$cc,
         qscal_tr = mean_flow_cfs / proxy_flows$tr) |>
  left_join(DSMhabitat::watershed_regions, by = join_by(watershed)) |>
  left_join(DSMhabitat::floodplain_modeling_metadata |>
              select(watershed, scaling_watershed, dec_jun_mean_flow_scaling),
            by = join_by(watershed)) |>
  mutate(selected_proxy = case_when(
    watershed %in% proxy_watersheds ~ watershed,
    !is.na(scaling_watershed) ~ scaling_watershed,
    region %in% c("South Delta", "San Joaquin River") ~ "Tuolumne River",
    abs(mean_flow_cfs - proxy_flows$dc) <= abs(mean_flow_cfs - proxy_flows$cc) ~ "Deer Creek",
    abs(mean_flow_cfs - proxy_flows$dc) > abs(mean_flow_cfs - proxy_flows$cc) ~ "Cottonwood Creek"))

proxy_lookup <- 
  flow_scalars |>
  select(watershed, selected_proxy) |>
  deframe()

floodplain_proxy_est <-
  flow_scalars |>
  rename(river_group = watershed) |>
  filter(!str_detect(river_group, "Sacramento") & !str_detect(river_group, "San Joaquin")) |>
  pivot_longer(cols = starts_with("qscal_"),
               names_transform = \(x) proxy_watersheds[str_replace(x, "qscal_", "")],
               names_to = "proxy_ws",
               values_to = "flow_scalar") |>
  mutate(selected = (proxy_ws == proxy_lookup[river_group]),
         flow_scalar_combined = if_else(selected,
                                        coalesce(dec_jun_mean_flow_scaling, flow_scalar),
                                        flow_scalar),
         result = pmap(list(river_group, proxy_ws, flow_scalar_combined),
                       possibly(scale_fp_flow_area, NA))) |>
  drop_na(result) |>
  unnest(result) 

floodplain_proxy_est |>
  ggplot() +
  facet_wrap(~watershed_labels[river_group], scales = "free_y") +
  geom_line(aes(x = flow_cfs, y = FR_floodplain_acres, color = proxy_ws)) + 
  scale_x_log10(breaks = scales::breaks_log(),
                labels = scales::label_number()) +
  theme(panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90))

floodplain_proxy_est |>
  filter(selected) |>
  ggplot() +
  facet_wrap(~watershed_labels[river_group], scales = "free_y") +
  geom_line(aes(x = flow_cfs, y = FR_floodplain_acres, color = proxy_ws)) + 
  scale_x_log10(breaks = scales::breaks_log(),
                labels = scales::label_number()) +
  theme(panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90))

floodplain_proxy_est_pivot <- 
  floodplain_proxy_est |>
  filter(selected) |>
  mutate(habitat = "floodplain") |>
  pivot_longer(ends_with("_floodplain_acres"),
               names_transform = \(x) str_replace(x, "_floodplain_acres", ""),
               names_to = "run", 
               values_to = "floodplain_ac") |>
  mutate(run = factor(run, 
                      levels = c("FR", "LFR", "WR", "SR", "ST"),
                      labels = c("fall", "late fall", "winter", "spring", "steelhead"))) |>
  mutate(suitable_ac = if_else(river_group %in% suitability_already_applied,
                               floodplain_ac,
                               DSMhabitat::apply_suitability(floodplain_ac * 4046.86) / 4046.86))
# version summing instream and floodplain
dsm_habitat_combined_sum <-
  dsm_habitat_combined |>
  ungroup() |>
  filter(hab_type == "rearing" & hab %in% c("juv", "floodplain") & !regional_approx) |> # only produces this combined estimate when there are modeled values for both instream AND floodplain
  select(river_group, run, hab, flow_cfs, wua_per_lf, suitable_ac) |>
  group_by(river_group, run) |>
  arrange(river_group, run, hab, flow_cfs) |>
  complete(hab = c("juv", "floodplain"), flow_cfs) |>
  complete(hab = c("juv", "floodplain"), flow_cfs = interp_flows) |>
  group_by(river_group, run, hab) |>
  mutate(across(c(wua_per_lf, suitable_ac),
                \(v) zoo::na.approx(v, x = log(flow_cfs), na.rm = F, rule = 2:2))) |>
  group_by(river_group, run, flow_cfs) |>
  summarize(across(c(wua_per_lf, suitable_ac), sum))
## `summarise()` has grouped output by 'river_group', 'run'. You can override
## using the `.groups` argument.
dsm_habitat_combined_sum |>
  filter(run == "fall") |>
  ggplot() + 
  facet_wrap(~watershed_labels[river_group], scales = "free_y") +
  geom_line(data = dsm_habitat_combined |> 
              filter(run == "fall" & hab %in% c("juv", "floodplain")),
            aes(x = flow_cfs, y = suitable_ac, linetype = "original", color = hab)) +
  geom_line(aes(x = flow_cfs, y = suitable_ac, linetype = "combined")) +
  scale_x_log10() +
  scale_linetype_manual(values = c("original" = "solid", "combined" = "dashed"))

dsm_habitat_proxy_estimates_combined <-
  bind_rows("juv" = instream_regional_approx_est |>
              filter(hab == "juv") |>
              ungroup() |>
              select(river_group, flow_cfs, suitable_ac),
          "floodplain" = floodplain_proxy_est_pivot |>
            ungroup() |>
            filter(run == "fall") |>
            select(river_group, flow_cfs, suitable_ac),
          .id = "hab") 

dsm_habitat_proxy_estimates_combined_sum <-
  dsm_habitat_proxy_estimates_combined |>
  select(river_group, hab, flow_cfs, suitable_ac) |>
  group_by(river_group) |>
  arrange(river_group, hab, flow_cfs) |>
  complete(hab = c("juv", "floodplain"), flow_cfs) |>
  complete(hab = c("juv", "floodplain"), flow_cfs = interp_flows) |>
  group_by(river_group, hab) |>
  mutate(suitable_ac = zoo::na.approx(suitable_ac, 
                                      x = log(flow_cfs), 
                                      na.rm = F, 
                                      rule = 2:2)) |>
  group_by(river_group, flow_cfs) |>
  summarize(suitable_ac = sum(suitable_ac))
## `summarise()` has grouped output by 'river_group'. You can override using the
## `.groups` argument.
dsm_habitat_proxy_estimates_combined_sum |>
  ggplot() + 
  facet_wrap(~watershed_labels[river_group], scales = "free_y") +
  geom_line(data = dsm_habitat_proxy_estimates_combined,
            aes(x = flow_cfs, y = suitable_ac, linetype = "original", color = hab)) +
  geom_line(aes(x = flow_cfs, y = suitable_ac, linetype = "combined")) +
  scale_x_log10() +
  scale_linetype_manual(values = c("original" = "solid", "combined" = "dashed"))

wua_predicted_cv_mainstems_grouped |>
  filter(habitat == "rearing") |>
  filter(river_group %in% dsm_habitat_combined$river_group) |>
  ggplot(aes(x = flow_cfs)) + 
  facet_wrap(~watershed_labels[river_group], scales="free") + 
  geom_ribbon(aes(ymin = wua_acres_pred_SD, ymax = wua_acres_pred_SN, 
                  fill = "habistat"), alpha=0.33) +
  geom_line(aes(y = wua_acres_pred, 
                linetype = "combined instream + floodplain",
                color = "habistat")) +
  geom_line(data = instream_regional_approx_est |>
              filter(habitat == "rearing"), # |>
            aes(y = suitable_ac,, 
                linetype = "instream",
                color = "DSMHabitat approximation")) +
  geom_line(data = floodplain_proxy_est_pivot |>
              filter(habitat == "floodplain" & run == "fall"), # |>
            aes(y = suitable_ac,, 
                linetype = "floodplain",
                color = "DSMHabitat approximation")) +
  geom_line(data=dsm_habitat_combined |>
              filter(run == "fall") |>
              filter(hab_type == "rearing") |>
              filter(flow_cfs <= 15000),
            aes(x = flow_cfs, 
                y = suitable_ac, 
                linetype = hab_subtype,
                color = paste("DSMHabitat", if_else(regional_approx, "approximation", "hydraulic model")))) +
  geom_line(data = dsm_habitat_proxy_estimates_combined_sum,
            aes(y = suitable_ac,
                linetype = "combined instream + floodplain",
                color = "DSMHabitat approximation")) +
  scale_x_log10() +
  scale_linetype_manual(name = "Habitat Type", 
                     values = c("instream" = "solid",
                                "floodplain" = "dashed", 
                                "combined instream + floodplain" = "dotted")) +
  scale_color_manual(name = "Data Source", 
                     values = c("habistat" = "#ffae34", 
                                "DSMHabitat hydraulic model" = "#6388b4",
                                "DSMHabitat approximation" = "#ef6f6a"),
                     aesthetics = c("color", "fill")) +
  theme(legend.position = "top", 
        panel.grid.minor = element_blank()) + 
  guides(color = guide_legend(nrow = 3), 
         linetype = guide_legend(nrow = 3)) +
  ylab("Predicted Total Habitat (acres)") + 
  xlab("Flow at Outlet (cfs)") + 
  ggtitle("Rearing Comparison - Total Acres")

Simplified Plots

reach_groups <- list(
  "Watersheds with DSMHabitat Instream Regional Approximation" = regional_approx_groups,
  "Watersheds with DSMHabitat Floodplain Proxy Approximation" = fp_proxy_groups,
  "Watersheds in Habistat training dataset" = c("Deer Creek", "Mokelumne River", "Stanislaus River", "Yuba River"),
  "Watersheds with DSMHabitat full model data" = c("American River", "Bear River", "Clear Creek", "Cottonwood Creek","Feather River", "Tuolumne River"))

for (x in names(reach_groups)) {
  
  plt <- wua_predicted_cv_mainstems_grouped |>
    filter(habitat == "rearing") |>
    filter(river_group %in% reach_groups[[x]]) |>
    ggplot(aes(x = flow_cfs)) + 
    facet_wrap(~watershed_labels[river_group], scales="free_y") + 
    geom_line(aes(y = wua_acres_pred, color = "habistat")) +
    geom_line(data = dsm_habitat_combined_sum |>
                filter(run == "fall") |>
                filter(river_group %in% reach_groups[[x]]) |>
                filter(flow_cfs >= 50 & flow_cfs <= 15000),
              aes(y = suitable_ac,
                  color = "DSMHabitat hydraulic model")) +
    geom_line(data = dsm_habitat_proxy_estimates_combined_sum |>
                filter(river_group %in% reach_groups[[x]]) |>
                filter(flow_cfs >= 50 & flow_cfs <= 15000),
              aes(y = suitable_ac,
                  color = "DSMHabitat approximation")) +
    scale_color_manual(name = "Data Source", 
                       values = c("habistat" = "#ffae34", 
                                  "DSMHabitat hydraulic model" = "#6388b4",
                                  "DSMHabitat approximation" = "#ef6f6a"),
                       aesthetics = c("color", "fill")) +
    scale_x_log10() +
    theme(legend.position = "top", 
          panel.grid.minor = element_blank()) + 
    guides(color = guide_legend(nrow = 1)) +
    ylab("Predicted Total Habitat (acres)") + 
    xlab("Flow at Outlet (cfs)") + 
    ggtitle(x)
  
  print(plt)
  
}
## Warning: Removed 144 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_line()`).

# watersheds used in the proxy models:
ws_proxies <- 
  list("spawn" = c("Battle Creek", "Butte Creek", "Clear Creek", 
                   "Calaveras River", "Mokelumne River"), # Cottonwood not used 
       "juv" =   c("Battle Creek", "Butte Creek", "Clear Creek", "Cow Creek",
                   "Calaveras River", "Mokelumne River"), # Cottonwood not used
       "floodplain" = c("Deer Creek", "Cottonwood Creek", "Tuolumne River"))

# watersheds that use the proxy models:
ws_uses_proxies <- 
  list("spawn" = c(regional_approx_groups_spawning, regional_approx_groups_sanjoaquin),
       "juv" = c(regional_approx_groups, regional_approx_groups_sanjoaquin),
       "floodplain" = fp_proxy_groups)
# Consumnes River derived from average WUA of Calaveras River and Mokelumne River

# watersheds that have empirical data:
ws_has_model <- 
  dsm_habitat_combined |>
  filter(!regional_approx) |>
  group_by(hab, river_group) |> 
  summarize(.groups = "drop") |>
  nest(.by = hab) |>
  mutate(data = map(data, deframe)) |>
  deframe()

ws_proxy_summary <-
  bind_rows("spawn" = cv_mainstems_spawning,
            "juv" = cv_mainstems_rearing,
            "floodplain" = cv_mainstems_rearing,
            .id = "hab") |>
  mutate(hab = factor(hab, 
                      levels = c("spawn", "juv", "floodplain"),
                      labels = c("Spawning", "Rearing", "Floodplain"))) |>
  mutate(has_model = map2_lgl(river_group, hab, \(x, y) x %in% ws_has_model[[y]]),
         is_proxy = map2_lgl(river_group, hab, \(x, y) x %in% ws_proxies[[y]]),
         uses_proxy = map2_lgl(river_group, hab, \(x, y) x %in% ws_uses_proxies[[y]])) |>
    mutate(label = case_when(is_proxy ~ "Has Model (used as a proxy)",
                             has_model ~ "Has Model",
                             uses_proxy ~ "Does Not Have Model (uses a proxy)")) |>
  drop_na(label)

ws_proxy_summary |>
  st_drop_geometry() |>
  select(river_group, hab, label) |>
  pivot_wider(names_from = hab, values_from = label) |>
  knitr::kable()
river_group Spawning Rearing Floodplain
American River Has Model Has Model Has Model
Antelope Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Battle Creek Has Model (used as a proxy) Has Model (used as a proxy) Has Model
Bear Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Bear River Has Model Has Model Has Model
Big Chico Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Has Model
Butte Creek Has Model (used as a proxy) Has Model (used as a proxy) Has Model
Calaveras River Has Model (used as a proxy) Has Model (used as a proxy) Does Not Have Model (uses a proxy)
Clear Creek Has Model (used as a proxy) Has Model (used as a proxy) Has Model
Cosumnes River Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Has Model
Cottonwood Creek Has Model Has Model Has Model (used as a proxy)
Cow Creek Does Not Have Model (uses a proxy) Has Model (used as a proxy) Does Not Have Model (uses a proxy)
Deer Creek Has Model Has Model Has Model (used as a proxy)
Elder Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Has Model
Feather River Has Model Has Model Has Model
Merced River Has Model Has Model Has Model
Mill Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Mokelumne River Has Model (used as a proxy) Has Model (used as a proxy) Has Model
Paynes Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Stanislaus River Has Model Has Model Has Model
Stony Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Thomes Creek Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy) Does Not Have Model (uses a proxy)
Tuolumne River Has Model Has Model Has Model (used as a proxy)
Yuba River Has Model Has Model Has Model
ws_proxy_summary |>
  ggplot() + 
  facet_wrap(~hab, nrow = 1) +
  geom_sf(data = filter(cv_mainstems, habitat != "historical"), color = "lightgray", linetype = "dotted") +
  geom_sf(aes(color = label)) +
  theme(legend.position = "top") +
  guides(color = guide_legend(nrow = 1)) +
  scale_color_manual(name = "",
                     values = c("Has Model" = "#8cc2ca",
                                "Has Model (used as a proxy)" = "#6388b4",
                                "Does Not Have Model (uses a proxy)" = "#ef6f6a"))

knitr::knit_exit()